pacman::p_load(dplyr, tidyverse, readxl, tidyr, stringr, data.table, raster, sf)
rm(list=ls())
################################################################################

###################################### Data from cropland surveys

######################### Argentina

df <- read_delim("../../data/landuse/raw/Estimaciones.csv", delim = ";", skip = 1, locale = locale(encoding = "ISO-8859-1"),show_col_types = FALSE)
colnames(df) <- c("state_id", "state", "county_id", "county", "crop_id", "crop", "campaign_id", "campaign",
                  "area_planted_ha", "area_harvested_ha", "production_t", "yield_kgha")
df <- df %>% mutate(year = str_sub(campaign, 1, 4), yield_tha = yield_kgha / 1000)
df <- df %>% dplyr::select(county, state, year, crop, area_planted_ha, area_harvested_ha, production_t, yield_tha)
df$county <- str_replace_all(df$county, c(
  "1 DE MAYO" = "PRIMERO DE MAYO",
  "12 DE OCTUBRE" = "DOCE DE OCTUBRE",
  "2 DE ABRIL" = "DOS DE ABRIL",
  "25 DE MAYO" = "VEINTICINCO DE MAYO",
  "25 DE  MAYO" = "VEINTICINCO DE MAYO",
  "9 DE JULIO" = "NUEVE DE JULIO",
  "ADOLFO GONZALES CHAVES" = "GONZALES CHAVES",
  "CHOS  MALAL" = "CHOS MALAL",
  "DR. MANUEL BELGRANO" = "CAPITAL",
  "FLORENTINO AMEGHINO" = "AMEGHINO",
  "GENERAL ANGEL V PENALOZA" = "GENERAL ANGEL V. PENALOZA",
  "JUAN B ALBERDI" = "JUAN B. ALBERDI",
  "JUAN F IBARRA" = "JUAN F. IBARRA",
  "LEANDRO N ALEM" = "LEANDRO N. ALEM",
  "MAYOR LUIS J FONTANA" = "MAYOR LUIS J. FONTANA",
  "PRESIDENTE DE LA PLAZA" = "PRESIDENCIA DE LA PLAZA",
  "GENERAL JUAN MADARIAGA" = "GENERAL MADARIAGA",
  "GUAYASAN" = "GUASAYAN",
  "LA CANDELARIA" = "CANDELARIA",
  "CORONEL DE MARINA L ROSALES" = "CORONEL ROSALES",
  "GENERAL LA MADRID" = "GENERAL LAMADRID",
  "GENERAL JUAN F QUIROGA" = "GENERAL JUAN F. QUIROGA",
  "SENGUERR" = "SENGUER",
  "GENERAL SARMIENTO" = "CAPITAL",
  "LA CAPITAL" = "CAPITAL",
  "JOSE CLEMENTE PAZ" = "JOSE C. PAZ",
  "SAN MIGUEL DE TUCUMAN" = "CAPITAL",
  "JUAN MARTIN DE PUEYRREDON" = "CAPITAL",
  "SILPICA" = "SILIPICA"))
df$crop <- str_replace_all(df$crop, c(
  "Ajo" = "garlic",
  "Algodón" = "cotton",
  "Alpiste" = "birdseed",
  "Arroz" = "rice",
  "Arveja" = "peas",
  "Avena" = "oats",
  "Banana" = "banana",
  "Caña de azúcar" = "sugarcane",
  "Cebada cervecera" = "barley beer",
  "Cebada forrajera" = "barley forage",
  "Cebada total" = "barley",
  "Cebolla total" = "onion",
  "Centeno" = "rye",
  "Colza" = "rapeseed",
  "Cártamo" = "safflower",
  "Garbanzo" = "chickpeas",
  "Girasol" = "sunflower",
  "Jojoba" = "jojoba",
  "Lenteja" = "lentils",
  "Limón" = "lemon",
  "Lino" = "flax",
  "Mandarina" = "mandarin",
  "Maní" = "peanut",
  "Maíz" = "maize",
  "Mijo" = "millet",
  "Naranja" = "orange",
  "Papa total" = "potato",
  "Pomelo" = "grapefruit",
  "Poroto seco" = "beans",
  "Soja 1ra" = "soybean1",
  "Soja 2da" = "soybean2",
  "Soja total" = "soybean",
  "Sorgo" = "sorghum",
  "Trigo candeal" = "wheat durum",
  "Trigo total" = "wheat",
  "Tung" = "tung",
  "Té" = "tea",
  "Yerba mate" = "yerba mate"))
df_1<-df

#Merge on counties
geo_df <- read_csv("../../data/landuse/clean/geographicunits_argentina.csv.gz",show_col_types = FALSE)
df_2 <- geo_df %>% dplyr::select(county_id, county, state)
df_c <- df_2 %>% right_join(df_1, by = c("county", "state"))

#Merge on states
df_3 <- geo_df %>% group_by(country, state, state_id, region_id, region) %>%
  summarise(across(where(is.numeric), sum, na.rm = TRUE), .groups = "drop")
df_c <- df_3 %>% right_join(df_c, by = "state") %>% dplyr::select(-land_area_km2, -population)
write_csv(df_c, "../../data/landuse/clean/cropland_argentina_county_unedited.csv.gz")


######################### Brazil - CONAB

df <- read_excel("../../data/landuse/raw/BrasilProdutoSerieHist.xls",sheet = 1, skip = 5)
df <- head(df, -3)
colnames(df)[-1] <- substr(colnames(df)[-1], 1, 4)
df <- df %>% rename(crop = PRODUTO)
df$crop <- str_replace_all(df$crop, c(
  'ALGODÃO' = 'cotton',
  'AMENDOIM 1ª SAFRA' = 'peanut1',
  'AMENDOIM 2ª SAFRA' = 'peanut2',
  'AMENDOIM TOTAL' = 'peanut',
  'ARROZ' = 'rice',
  'AVEIA' = 'oats',
  'CANOLA' = 'rapeseed',
  'CENTEIO' = 'rye',
  'CEVADA' = 'barley',
  'FEIJÃO 1ª SAFRA' = 'beans1',
  'FEIJÃO 2ª SAFRA' = 'beans2',
  'FEIJÃO 3ª SAFRA' = 'beans3',
  'FEIJÃO TOTAL' = 'beans',
  'GERGELIM' = 'sesame',
  'GIRASSOL' = 'sunflower',
  'MAMONA' = 'castor',
  'MILHO 1ª SAFRA' = 'maize1',
  'MILHO 2ª SAFRA' = 'maize2',
  'MILHO 3ª SAFRA' = 'maize3',
  'MILHO TOTAL' = 'maize',
  'SOJA' = 'soybean',
  'SORGO' = 'sorghum',
  'TRIGO' = 'wheat',
  'TRITICALE' = 'triticale'))
df_1 <- df %>% pivot_longer(cols = -crop,names_to = "year",values_to = "area_planted_ha") %>% mutate(area_planted_ha = area_planted_ha * 1000)

df <- read_excel("../../data/landuse/raw/BrasilProdutoSerieHist.xls",sheet = 2, skip = 5)
df <- head(df, -3)
colnames(df)[-1] <- substr(colnames(df)[-1], 1, 4)
df <- df %>% rename(crop = PRODUTO)
df$crop <- str_replace_all(df$crop, c(
  'ALGODÃO - CAROÇO' = 'cotton',
  'AMENDOIM 1ª SAFRA' = 'peanut1',
  'AMENDOIM 2ª SAFRA' = 'peanut2',
  'AMENDOIM TOTAL' = 'peanut',
  'ARROZ' = 'rice',
  'AVEIA' = 'oats',
  'CANOLA' = 'rapeseed',
  'CENTEIO' = 'rye',
  'CEVADA' = 'barley',
  'FEIJÃO 1ª SAFRA' = 'beans1',
  'FEIJÃO 2ª SAFRA' = 'beans2',
  'FEIJÃO 3ª SAFRA' = 'beans3',
  'FEIJÃO TOTAL' = 'beans',
  'GERGELIM' = 'sesame',
  'GIRASSOL' = 'sunflower',
  'MAMONA' = 'castor',
  'MILHO 1ª SAFRA' = 'maize1',
  'MILHO 2ª SAFRA' = 'maize2',
  'MILHO 3ª SAFRA' = 'maize3',
  'MILHO TOTAL' = 'maize',
  'SOJA' = 'soybean',
  'SORGO' = 'sorghum',
  'TRIGO' = 'wheat',
  'TRITICALE' = 'triticale'))
df_2 <- df %>% pivot_longer(cols = -crop, names_to = "year", values_to = "yield_tha") %>% mutate(yield_tha = yield_tha / 1000)

df <- read_excel("../../data/landuse/raw/BrasilProdutoSerieHist.xls",sheet = 3,skip = 5)
df <- head(df, -3)
colnames(df)[-1] <- substr(colnames(df)[-1], 1, 4)
df <- df %>% rename(crop = PRODUTO)
df$crop <- str_replace_all(df$crop, c(
  'ALGODÃO - CAROÇO' = 'cotton',
  'AMENDOIM 1ª SAFRA' = 'peanut1',
  'AMENDOIM 2ª SAFRA' = 'peanut2',
  'AMENDOIM TOTAL' = 'peanut',
  'ARROZ' = 'rice',
  'AVEIA' = 'oats',
  'CANOLA' = 'rapeseed',
  'CENTEIO' = 'rye',
  'CEVADA' = 'barley',
  'FEIJÃO 1ª SAFRA' = 'beans1',
  'FEIJÃO 2ª SAFRA' = 'beans2',
  'FEIJÃO 3ª SAFRA' = 'beans3',
  'FEIJÃO TOTAL' = 'beans',
  'GERGELIM' = 'sesame',
  'GIRASSOL' = 'sunflower',
  'MAMONA' = 'castor',
  'MILHO 1ª SAFRA' = 'maize1',
  'MILHO 2ª SAFRA' = 'maize2',
  'MILHO 3ª SAFRA' = 'maize3',
  'MILHO TOTAL' = 'maize',
  'SOJA' = 'soybean',
  'SORGO' = 'sorghum',
  'TRIGO' = 'wheat',
  'TRITICALE' = 'triticale'))
df_3 <- df %>% pivot_longer(cols = -crop,names_to = "year", values_to = "production_t" ) %>%  mutate(production_t = production_t * 1000)

df <- read_excel("../../data/landuse/raw/CanaSerieHistZ-ZreaZTotal.xls", sheet = 6, skip = 5)
df <- df[(nrow(df) - 2), , drop = FALSE]
colnames(df)[-1] <- substr(colnames(df)[-1], 1, 4)
df <- df %>% rename(crop = 'REGIÃO/UF')
df$crop <- recode(df$crop, 'BRASIL' = 'sugarcane')
df <- df %>%pivot_longer(cols = -crop,names_to = "year",values_to = "area_planted_ha") %>% mutate(area_planted_ha = area_planted_ha * 1000,production_t = NA_real_,yield_tha = NA_real_)
df_s <- df

#Merge
df <- df_1 %>% inner_join(df_3, by = c("year", "crop")) %>% inner_join(df_2, by = c("year", "crop"))
df <- df %>% dplyr::select(year, crop, area_planted_ha, production_t, yield_tha)%>%arrange(year)
df <- bind_rows(df, df_s)
write_csv(df, "../../data/landuse/clean/cropland_brazil_nation_conab_unedited.csv.gz")


######################### Brazil - PAM

sheet_names <- c("area", "production", "yield", "value")
df_list <- list()
sheet_n <- 0
for (variable in sheet_names) {
  
  file_path <- paste0("../../data/landuse/raw/tabela5457_", variable, ".xlsx")
  dfm <- read_excel(file_path, sheet = 1, .name_repair = "unique_quiet")
  df <- dfm[-nrow(dfm), ]
  df <- df[-c(1, 2), ][, -2]
  df <- as.data.frame(t(df))
  df[1, 1] <- "year"
  df[1, 2] <- "crop"
  colnames(df) <- df[1, ]
  df <- df[-1, ]
  years <- rep(1990:2019, each = length(unique(df$crop)))
  df$year <- years
  df[df == "-" | df == "..."] <- 0
  
  dfx <- df
  df$crop <- dfx$crop
  df_long <- pivot_longer(df, cols = -c(year, crop), names_to = "county_id", values_to = "value")
  df_long$variable <- variable
  df_long$crop <- str_replace_all(df_long$crop, c(
    "Algodão herbáceo \\(em caroço\\)" = "cotton",
    "Arroz \\(em casca\\)" = "rice",
    "Aveia \\(em grão\\)" = "oats",
    "Café \\(em grão\\) Total" = "coffee",
    "Cana-de-açúcar" = "sugarcane",
    "Centeio \\(em grão\\)" = "rye",
    "Cevada \\(em grão\\)" = "barley",
    "Feijão \\(em grão\\)" = "beans",
    "Girassol \\(em grão\\)" = "sunflower",
    "Milho \\(em grão\\)" = "maize",
    "Soja \\(em grão\\)" = "soybean",
    "Sorgo \\(em grão\\)" = "sorghum",
    "Trigo \\(em grão\\)" = "wheat"))
  
  df_list[[length(df_list) + 1]] <- df_long
  sheet_n <- sheet_n + 1
}

df_f <- bind_rows(df_list)
df_f <- df_f %>% mutate(value = if_else(variable == "yield", as.numeric(value) / 1000, as.numeric(value)),
                        value = if_else(variable == "value", value * 1000, value),
                        variable = recode(variable,"area" = "area_planted_ha","production" = "production_t","yield" = "yield_tha","value" = "value_r"))
df_1 <- df_f


######################### Brazil - PAM - First and second harvest maize

sheet_names <- c("area", "production", "yield")
df_list <- list()
sheet_n <- 1
for (variable in sheet_names){
  
  file_path <- paste0("../../data/landuse/raw/tabela839.xlsx")
  dfm <- read_excel(file_path, sheet = sheet_n, .name_repair = "unique_quiet")
  
  df <- dfm[-nrow(dfm), ]
  df <- df[-c(1, 2), ][, -2]
  df <- as.data.frame(t(df))
  df[1, 1] <- "year"
  df[1, 2] <- "crop"
  colnames(df) <- df[1, ]
  df <- df[-1, ]
  
  years <- rep(2003:2019, each = length(unique(df$crop)))
  df$year <- years
  df[df == "-" | df == "..."] <- 0
  dfx <- df
  df$crop <- dfx$crop

  df_long <- pivot_longer(df, cols = -c(year, crop), names_to = "county_id", values_to = "value")
  df_long$variable <- variable
  df_long <- df_long %>% mutate( crop = case_when(str_detect(crop, "1ª") ~ "maize1",
                                                  str_detect(crop, "2ª") ~ "maize2",
                                                  TRUE ~ crop))
  df_list[[length(df_list) + 1]] <- df_long
  sheet_n <- sheet_n + 1
}

df_f <- bind_rows(df_list)
df_2 <- df_f %>% filter(variable %in% c("area", "production", "yield")) %>%
  mutate(value = if_else(variable == "yield", as.numeric(value) / 1000, as.numeric(value)),
         variable = recode(variable,"area" = "area_planted_ha","production" = "production_t","yield" = "yield_tha"))
#Merge
df <- bind_rows(df_1, df_2)
df$county_id <- paste0("BR", df$county_id)
df <- df[order(df$county_id), ]
df <- df[, c("year", "county_id", setdiff(names(df), c("year", "county_id")))]
write_csv(df, "../../data/landuse/clean/cropland_brazil_county_unedited.csv.gz")


